library(foreign)
library(plyr)
library(xtable)
library(arm)	
library(Amelia)
library(MASS)	
library(car)
library(gmodels)
library(reporttools)
library(sandwich)
library(lmtest)
library(boot)

all.appeals <- read.dta('Israel district appeals.dta')
#set 99s to NA and drop defendants with missing race and cases with missing mixed/Jewish status
ninety.nines <- all.appeals == 99
all.appeals[ninety.nines] <- NA


############## drop irrelevant fields
leave.out <- c(
"jarab1",              "jfem1",               "jreligiousity1",      "jarab2"      ,       
 "jfem2"          ,     "jreligiousity2"  ,    "jarab3"    ,          "jfem3"    ,          
  "jreligiousity3"    , 
  #        "arabs_J"       ,    
 "prosecutor_arab" ,    "prosecutor_female"  , "defender_public"   , 
 "defender_arab"    ,   "defender_female" ,    "accused_s_num"     , 
 "accused_israeli"   ,  "magistrate_arab"   ,  "magistrate_female"  ,
 "PublicOrder"  ,       "Property"   ,         "BodilyHarm"    ,     
 "Morality"      ,      "TrafficLicencing"   , "Fraud"        ,      
 "Regulatory"   ,       "Fiscal"   ,           "EconomicBusiness",   
 "security"      ,      "deferred"    ,         
 "detained"      ,      "Report"        ,         "Prison_term" , "Com_term"    ,
 "Fine"   ,      "Compensation" ,"prison_reduced"  ,   
 "prison_increased" ,   "Unanimous"     ,      "judge2_majority"  ,  
 "judge2_ruling" ,      "judge3_majority"  ,   "distc1"      ,       
"distc2"         ,     "distc3"          ,    "avg_jage"    ,       
"avg_jexp"        ,    "methodPlea"  ,                 "anyvictim"   ,       
 "victimRace1"    ,       "victimRace3"        ,
 "victimRace4"   ,      "prequest1"  ,         "prequest3"    ,       "prequest4"    ,       "appellant3"     ,           
'encoder','id','year','date', 'date2','accused_name','judge1','judge2','judge3','jYOB1','jYOB2','jYOB3','jYON_M1','jYON_M2','jYON_M3','jYON_D1','jYON_D2','jYON_D3','accused_Num','accused_nationality','Magistrate_date','Magistrate','location','method','Offences_new','security_type','activating_defered_M','Report_positive','Prosecutorial_request','victim','Appellant')

leave.in <- all.appeals[,-match(leave.out, colnames(all.appeals))]

####### transform factors to be numeric

leave.in$lenient <- leave.in$verdict == 'More lenient' | is.na(leave.in$verdict) #one NA and it should be more lenient
leave.in$harsher <- leave.in$verdict == 'Harsher'

leave.in$court_Nazareth <- leave.in$court == 'Nazareth'
leave.in$court_TLV <- leave.in$court == 'TLV'
leave.in$court_Jerusalem <- leave.in$court == 'Jerusalem'

judge.codes <- cbind(all.appeals$jcode1, all.appeals$jcode2, all.appeals$jcode3)
judge.codes[is.na(judge.codes)] <- 0 #get rid of NAs
#sort the codes 
judge.codes <- t(apply(judge.codes, 1, sort))
leave.in$uniq.panel <- as.factor(paste(judge.codes[,1], judge.codes[,2], judge.codes[,3], sep = '.'))

#drop factor columns now that we have expanded them into dummies
leave.in <- leave.in[,-which(colnames(leave.in) %in% c('court','verdict', 'jcode1', 'jcode2', 'jcode3'))]

all.appeals <- leave.in


######### drop observations with NAs in key accused_arab and narabs_J columns

all.appeals <- all.appeals[-which(is.na(all.appeals$accused_arab)),]
if(sum(is.na(all.appeals$narabs_J)) > 0){
	all.appeals <- all.appeals[-which(is.na(all.appeals$narabs_J)),]
}

age.idx <- grep('jage', colnames(all.appeals))
exp.idx <- grep('jexp', colnames(all.appeals))
all.appeals$avg_jage <- rowMeans(all.appeals[,age.idx],na.rm = T)
all.appeals$avg_jexp <- rowMeans(all.appeals[,exp.idx], na.rm = T)
all.appeals <- all.appeals[,-c(age.idx, exp.idx)]



##### handle missingness

#response variables
my.resp.vars <- c('lenient','harsher','incarceration','Prison_term_D')

#get NA counts for covariates and response variables
na.counts.cov <- apply(all.appeals, 1, function(x) sum(is.na(x)))
na.counts.resp <- apply(all.appeals[my.resp.vars],1, function(x) sum(is.na(x)))

#get rid of all rows with an important variable missing or several less important missing
bad.apples <- which(na.counts.cov > 2 | na.counts.resp > 0)
all.appeals <- all.appeals[-bad.apples,]


#####################  make Table 4 (variable HarshT in code) ##################### 

#table w/ proportions of harsher appeals
subdat <- all.appeals[all.appeals$appellant2 == 1,]
HarshT <- rbind(
  cbind(mean(subdat$harsher[subdat$accused_arab==0& subdat$narabs_J==0]),mean(subdat$harsher[subdat$accused_arab==1& subdat$narabs_J==0]), sum(subdat$narabs_J==0)),
  cbind(mean(subdat$harsher[subdat$accused_arab==0& subdat$narabs_J==1]), mean(subdat$harsher[subdat$accused_arab==1& subdat$narabs_J==1]), sum(subdat$narabs_J==1)),
  c(sum(subdat$accused_arab==0), sum(subdat$accused_arab==1), nrow(subdat)))
  
colnames(HarshT) <- c("Jewish", "Arab","Total")
rownames(HarshT) <- c("All-Jewish", "Mixed-panel","Total")
#multiplicative factor to scale proportions up to percentages
scale.mat <- matrix(c(100,100,1,100,100,rep(1,4)), nrow = 3)
print("TABLE 4")
print(xtable(HarshT*scale.mat, digits=1))

##################### make Table 3 (variable LenientT in code) ##################### 

#table w/ proportions of more lenient appeals
subdat <- all.appeals[all.appeals$appellant1 == 1,]
LenientT <- rbind(
  cbind(mean(subdat$lenient[subdat$accused_arab==0& subdat$narabs_J==0]),mean(subdat$lenient[subdat$accused_arab==1& subdat$narabs_J==0])),
  cbind(mean(subdat$lenient[subdat$accused_arab==0& subdat$narabs_J==1]), mean(subdat$lenient[subdat$accused_arab==1& subdat$narabs_J==1])))

  
colnames(LenientT) <- c("Jewish", "Arab")
rownames(LenientT) <- c("All-Jewish", "Mixed-panel")
LenientT <- LenientT*100

#add marginal counts
LenientT <- cbind(LenientT, c(sum(subdat$narabs_J == 0),sum(subdat$narabs_J == 1)))
LenientT <- rbind(LenientT, c(sum(subdat$accused_arab == 0), sum(subdat$accused_arab == 1), nrow(subdat)))

colnames(LenientT) <- c("Jewish", "Arab", 'Total')
rownames(LenientT) <- c("All-Jewish", "Mixed-panel",'Total')

print("TABLE 3")
print(xtable(LenientT, digits=1))


#####################  make Table 7 (IncarcT in code) ##################### 

#table w/ proportions incarcerated by race
IncarcT <- rbind(
	cbind(
		mean(all.appeals$incarceration[all.appeals$narabs_J == 0 & all.appeals$accused_arab == 0]),
		mean(all.appeals$incarceration[all.appeals$narabs_J == 0 & all.appeals$accused_arab == 1]),
		sum(all.appeals$narabs_J == 0)),
	cbind(
		mean(all.appeals$incarceration[all.appeals$narabs_J == 1 & all.appeals$accused_arab == 0]),
		mean(all.appeals$incarceration[all.appeals$narabs_J == 1 & all.appeals$accused_arab == 1]),
		sum(all.appeals$narabs_J == 1)),
	cbind(
			sum(all.appeals$accused_arab == 0),
			sum(all.appeals$accused_arab == 1),
			nrow(all.appeals)))

colnames(IncarcT) <- c('Jewish','Arab','Total')
rownames(IncarcT) <- c('All-Jewish','Mixed-race','Total')

print("TABLE 7")
print(xtable(round(IncarcT, 2)))


##################### make Table 8 ##################### 
source('colnames2labels.R')
#drop two irrelevant columns
if('arabs_J' %in% colnames(all.appeals)) all.appeals <- all.appeals[,-which(colnames(all.appeals) == 'arabs_J')]
all.appeals <- all.appeals[,-which(colnames(all.appeals) == 'num')]
appeals <- all.appeals
appeals.latex <- names2labels(appeals[,-which(colnames(appeals) == 'uniq.panel')])

print("TABLE 8")
print(tableContinuous(vars = appeals.latex, stats =c("mean", "s", "min", "median","max", "n"),cap = "Descriptive Statistics: District Appeals Sample, 2007-2011", lab = "tab:DescriptiveTable"))